1 Effect of UPSTM-Based Decorrelation on Feature Discovery

1.0.1 Loading the libraries

library("FRESA.CAD")
library(readxl)
library(igraph)
library(umap)
library(tsne)
library(entropy)
library(TH.data)
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

1.1 Material and Methods

All variables are derived from a laser scanning image of the eye background taken by the Heidelberg Retina Tomograph. Most of the variables describe either the area or volume in certain parts of the papilla and are measured in four sectors (temporal, superior, nasal and inferior) as well as for the whole papilla (global). The global measurement is, roughly, the sum of the measurements taken in the four sector.

The observations in both groups are matched by age and sex to prevent any bias.

Source Torsten Hothorn and Berthold Lausen (2003), Double-Bagging: Combining classifiers by bootstrap aggregation. Pattern Recognition, 36(6), 1303–1309.

1.2 The Data

GlaucomaM {TH.data}


data("GlaucomaM")

pander::pander(table(GlaucomaM$Class))
glaucoma normal
98 98

GlaucomaM$Class <- 1*(GlaucomaM$Class=="glaucoma")

1.2.0.1 Standarize the names for the reporting

studyName <- "GlaucomaM"
dataframe <- GlaucomaM
outcome <- "Class"
thro <- 0.80
TopVariables <- 10
cexheat = 0.25

1.3 Generaring the report

1.3.1 Libraries

Some libraries

library(psych)
library(whitening)
library("vioplot")
library("rpart")

1.3.2 Data specs

pander::pander(c(rows=nrow(dataframe),col=ncol(dataframe)-1))
rows col
196 62
pander::pander(table(dataframe[,outcome]))
0 1
98 98

varlist <- colnames(dataframe)
varlist <- varlist[varlist != outcome]

largeSet <- length(varlist) > 1500 

1.3.3 Scaling the data

Scaling and removing near zero variance columns and highly co-linear(r>0.99999) columns


  ### Some global cleaning
  sdiszero <- apply(dataframe,2,sd) > 1.0e-16
  dataframe <- dataframe[,sdiszero]

  varlist <- colnames(dataframe)[colnames(dataframe) != outcome]
  tokeep <- c(as.character(correlated_Remove(dataframe,varlist,thr=0.99999)),outcome)
  dataframe <- dataframe[,tokeep]

  varlist <- colnames(dataframe)
  varlist <- varlist[varlist != outcome]
  
  iscontinous <- sapply(apply(dataframe,2,unique),length) >= 5 ## Only variables with enough samples



dataframeScaled <- FRESAScale(dataframe,method="OrderLogit")$scaledData

1.4 The heatmap of the data

numsub <- nrow(dataframe)
if (numsub > 1000) numsub <- 1000


if (!largeSet)
{

  hm <- heatMaps(data=dataframeScaled[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 xlab="Feature",
                 ylab="Sample",
                 srtCol=45,
                 srtRow=45,
                 cexCol=cexheat,
                 cexRow=cexheat
                 )
  par(op)
}

1.4.0.1 Correlation Matrix of the Data

The heat map of the data


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  #cormat <- Rfast::cora(as.matrix(dataframe[,varlist]),large=TRUE)
  cormat <- cor(dataframe[,varlist],method="pearson")
  cormat[is.na(cormat)] <- 0
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Original Correlation",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.9961105

1.5 The decorrelation


DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#> 
#>  mr vbrs varg vasg tmg mhcs 
#>        ag        at        as        an        ai       eag 
#> 0.9836066 0.9508197 0.8852459 0.9672131 0.9180328 0.7049180 
#> 
#>  Included: 61 , Uni p: 0.002419355 , Base Size: 16 , Rcrit: 0.2004905 
#> 
#> 
 1 <R=0.996,thr=0.950>, Top: 10< 5 >[Fa= 10 ]( 10 , 16 , 0 ),<|>Tot Used: 26 , Added: 16 , Zero Std: 0 , Max Cor: 0.946
#> 
 2 <R=0.946,thr=0.900>, Top: 7< 5 >[Fa= 10 ]( 7 , 14 , 10 ),<|>Tot Used: 37 , Added: 14 , Zero Std: 0 , Max Cor: 0.892
#> 
 3 <R=0.892,thr=0.800>, Top: 11< 2 >[Fa= 17 ]( 11 , 14 , 10 ),<|>Tot Used: 52 , Added: 14 , Zero Std: 0 , Max Cor: 0.799
#> 
 4 <R=0.799,thr=0.800>
#> 
 [ 4 ], 0.7689698 Decor Dimension: 52 Nused: 52 . Cor to Base: 29 , ABase: 61 , Outcome Base: 0 
#> 
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]

pander::pander(sum(apply(dataframe[,varlist],2,var)))

3.03

pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))

0.678

pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))

3.94

pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))

3.74

1.5.1 The decorrelation matrix


if (!largeSet)
{

  par(cex=0.6,cex.main=0.85,cex.axis=0.7)
  
  UPLTM <- attr(DEdataframe,"UPLTM")
  
  gplots::heatmap.2(1.0*(abs(UPLTM)>0),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
  par(op)
  
  
  
}

1.5.2 Formulas Network

Displaying the features associations

par(op)

if (ncol(dataframe) < 1000)
{

  DEdataframeB <- ILAA(dataframe,verbose=TRUE,thr=thro,bootstrap=30)

  transform <- 1*(attr(DEdataframeB,"UPLTM") != 0)
  print(ncol(transform))
  thrcol <- 1 + 0.025*nrow(transform)
  rsum <- apply(1*(transform !=0),1,sum) > 2
  csum <- apply(1*(transform !=0),2,sum) > thrcol | rsum
  transform <- transform[csum,csum]
  csum <- (apply(1*(transform !=0),2,sum) > 1) & (apply(1*(transform !=0),1,sum) > 1)
  transform <- transform[csum,csum]
  print(ncol(transform))
  if (ncol(transform)>100)
  {
    thrcol <- 1 + 0.10*nrow(transform)
    rsum <- apply(1*(transform !=0),1,sum) > 4
    csum <- apply(1*(transform !=0),2,sum) > thrcol | rsum
    transform <- transform[csum,csum]
    csum <- (apply(1*(transform !=0),2,sum) > 3) & (apply(1*(transform !=0),1,sum) > 3)
    transform <- transform[csum,csum]
  }
  print(ncol(transform))
  if (ncol(transform)>100)
  {
    thrcol <- 1 + 0.20*nrow(transform)
    rsum <- apply(1*(transform !=0),1,sum) > 8
    csum <- apply(1*(transform !=0),2,sum) > thrcol | rsum
    transform <- transform[csum,csum]
    csum <- (apply(1*(transform !=0),2,sum) > 7) & (apply(1*(transform !=0),1,sum) > 7)
    transform <- transform[csum,csum]
  }
  print(ncol(transform))

  if ((ncol(transform) > 10) && (ncol(transform) < 150))
  {
    
      gplots::heatmap.2(transform,
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Red Decorrelation matrix",
                    cexRow = cexheat,
                    cexCol = cexheat,
                   srtCol=45,
                   srtRow=45,
                    key.title=NA,
                    key.xlab="|Beta|>0",
                    xlab="Output Feature", ylab="Input Feature")
  
      par(op)

    
    colnames(transform) <- str_remove_all(colnames(transform),"La_")
    
    VertexSize <- apply(transform,2,mean)
    VertexSize <- 5*VertexSize/max(VertexSize)
    
    gr <- graph_from_adjacency_matrix(transform,mode = "directed",diag = FALSE,weighted=TRUE)
    gr$layout <- layout_with_fr
    
    fc <- cluster_optimal(gr)
    plot(fc, gr,
         edge.width = 0.5*E(gr)$weight,
         vertex.size=VertexSize,
         edge.arrow.size=0.5,
         edge.arrow.width=0.5,
         vertex.label.cex=0.65,
         vertex.label.dist=1,
         main="Feature Association")
  }
}
#> fast | LM |
#>  mr vbrs varg vasg tmg mhcs 
#>        ag        at        as        an        ai       eag 
#> 0.9836066 0.9508197 0.8852459 0.9672131 0.9180328 0.7049180 
#> 
#>  Included: 61 , Uni p: 0.002419355 , Base Size: 16 , Rcrit: 0.2004905 
#> 
#> 
 1 <R=0.996,thr=0.950>, Top: 10< 5 >[Fa= 10 ]( 10 , 16 , 0 ),<|>Tot Used: 26 , Added: 16 , Zero Std: 0 , Max Cor: 0.946
#> 
 2 <R=0.946,thr=0.900>, Top: 7< 5 >[Fa= 10 ]( 7 , 14 , 10 ),<|>Tot Used: 37 , Added: 14 , Zero Std: 0 , Max Cor: 0.892
#> 
 3 <R=0.892,thr=0.800>, Top: 11< 2 >[Fa= 17 ]( 11 , 14 , 10 ),<|>Tot Used: 52 , Added: 14 , Zero Std: 0 , Max Cor: 0.799
#> 
 4 <R=0.799,thr=0.800>
#> 
 [ 4 ], 0.7689698 Decor Dimension: 52 Nused: 52 . Cor to Base: 29 , ABase: 61 , Outcome Base: 0 
#> 
bootstrapping->..............................
#> 
[1] 52
#> [1] 18
#> [1] 18
#> [1] 18


par(op)

1.6 The heatmap of the decorrelated data

if (!largeSet)
{

  hm <- heatMaps(data=DEdataframe[1:numsub,],
                 Outcome=outcome,
                 Scale=TRUE,
                 hCluster = "row",
                 cexRow = cexheat,
                 cexCol = cexheat,
                 srtCol=45,
                 srtRow=45,
                 xlab="Feature",
                 ylab="Sample")
  par(op)
}

1.7 The correlation matrix after decorrelation

if (!largeSet)
{

  cormat <- cor(DEdataframe[,varlistc],method="pearson")
  cormat[is.na(cormat)] <- 0
  
  gplots::heatmap.2(abs(cormat),
                    trace = "none",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "Correlation after ILAA",
                    cexRow = cexheat,
                    cexCol = cexheat,
                     srtCol=45,
                     srtRow=45,
                    key.title=NA,
                    key.xlab="|Pearson Correlation|",
                    xlab="Feature", ylab="Feature")
  
  par(op)
  diag(cormat) <- 0
  print(max(abs(cormat)))
}

[1] 0.799364

1.8 U-MAP Visualization of features

1.8.1 The UMAP based on LASSO on Raw Data


if (nrow(dataframe) < 1000)
{
  classes <- unique(dataframe[1:numsub,outcome])
  raincolors <- rainbow(length(classes))
  names(raincolors) <- classes
  datasetframe.umap = umap(scale(dataframe[1:numsub,varlist]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: Original",t='n')
  text(datasetframe.umap$layout,labels=dataframe[1:numsub,outcome],col=raincolors[dataframe[1:numsub,outcome]+1])
}

1.8.2 The decorralted UMAP

if (nrow(dataframe) < 1000)
{

  datasetframe.umap = umap(scale(DEdataframe[1:numsub,varlistc]),n_components=2)
  plot(datasetframe.umap$layout,xlab="U1",ylab="U2",main="UMAP: After ILAA",t='n')
  text(datasetframe.umap$layout,labels=DEdataframe[1:numsub,outcome],col=raincolors[DEdataframe[1:numsub,outcome]+1])
}

1.9 Univariate Analysis

1.9.1 Univariate



univarRAW <- uniRankVar(varlist,
               paste(outcome,"~1"),
               outcome,
               dataframe,
               rankingTest="AUC")



univarDe <- uniRankVar(varlistc,
               paste(outcome,"~1"),
               outcome,
               DEdataframe,
               rankingTest="AUC",
               )

1.9.2 Final Table


univariate_columns <- c("caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC")

##top variables
topvar <- c(1:length(varlist)) <= TopVariables
tableRaw <- univarRAW$orderframe[topvar,univariate_columns]
pander::pander(tableRaw)
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
vari 0.04480 0.0368 0.1150 0.0543 0.1035 0.880
varg 0.17851 0.1170 0.4139 0.1967 0.0252 0.873
vars 0.04506 0.0314 0.1068 0.0596 0.0172 0.851
tmi 0.03664 0.1258 -0.1097 0.1038 0.6980 0.832
varn 0.08224 0.0595 0.1774 0.0894 0.2533 0.830
hic 0.39863 0.1407 0.2114 0.1574 0.7659 0.822
tmg -0.03356 0.0885 -0.1524 0.0927 0.5179 0.818
phcg -0.03546 0.0691 -0.1216 0.0661 0.9359 0.818
phci 0.00898 0.0882 -0.0937 0.0779 0.7638 0.814
tms 0.03959 0.1166 -0.1192 0.1376 0.9756 0.808


topLAvar <- univarDe$orderframe$Name[str_detect(univarDe$orderframe$Name,"La_")]
topLAvar <- unique(c(univarDe$orderframe$Name[topvar],topLAvar[1:as.integer(TopVariables/2)]))
finalTable <- univarDe$orderframe[topLAvar,univariate_columns]


pander::pander(finalTable)
  caseMean caseStd controlMean controlStd controlKSP ROCAUC
varg 0.17851 0.11700 0.4139 0.1967 2.52e-02 0.873
hic 0.39863 0.14073 0.2114 0.1574 7.66e-01 0.822
tmg -0.03356 0.08846 -0.1524 0.0927 5.18e-01 0.818
phcg -0.03546 0.06909 -0.1216 0.0661 9.36e-01 0.818
phci 0.00898 0.08818 -0.0937 0.0779 7.64e-01 0.814
rnf 0.13956 0.06764 0.2252 0.0975 2.25e-01 0.800
phcn 0.00692 0.07364 -0.0717 0.0881 3.03e-01 0.796
vart 0.00640 0.00533 0.0146 0.0117 7.54e-04 0.777
vbrs 0.16142 0.09970 0.0861 0.1319 4.75e-06 0.773
La_mdic 0.07786 0.05356 0.0351 0.0380 1.36e-01 0.766
La_vbrt -0.02477 0.01482 -0.0345 0.0204 2.83e-01 0.713
La_as -0.61158 0.02849 -0.5903 0.0285 8.42e-01 0.709
La_vbri -0.01438 0.03870 -0.0394 0.0364 8.08e-02 0.698
La_eas -0.72443 0.05906 -0.7891 0.1222 4.93e-03 0.697

dc <- getLatentCoefficients(DEdataframe)
fscores <- attr(DEdataframe,"fscore")


pander::pander(c(mean=mean(sapply(dc,length)),total=length(dc),fraction=length(dc)/(ncol(dataframe)-1)))
mean total fraction
2.07 42 0.677

theCharformulas <- attr(dc,"LatentCharFormulas")


finalTable <- rbind(finalTable,tableRaw[topvar[!(topvar %in% topLAvar)],univariate_columns])


orgnamez <- rownames(finalTable)
orgnamez <- str_remove_all(orgnamez,"La_")
finalTable$RAWAUC <- univarRAW$orderframe[orgnamez,"ROCAUC"]
finalTable$DecorFormula <- theCharformulas[rownames(finalTable)]
finalTable$fscores <- fscores[rownames(finalTable)]

Final_Columns <- c("DecorFormula","caseMean","caseStd","controlMean","controlStd","controlKSP","ROCAUC","RAWAUC","fscores")

finalTable <- finalTable[order(-finalTable$ROCAUC),]
pander::pander(finalTable[,Final_Columns])
  DecorFormula caseMean caseStd controlMean controlStd controlKSP ROCAUC RAWAUC fscores
vari NA 0.04480 0.03683 0.1150 0.0543 1.04e-01 0.880 0.880 NA
varg NA 0.17851 0.11700 0.4139 0.1967 2.52e-02 0.873 0.873 3
varg1 NA 0.17851 0.11700 0.4139 0.1967 2.52e-02 0.873 NA NA
vars NA 0.04506 0.03143 0.1068 0.0596 1.72e-02 0.851 0.851 NA
tmi NA 0.03664 0.12578 -0.1097 0.1038 6.98e-01 0.832 0.832 NA
varn NA 0.08224 0.05953 0.1774 0.0894 2.53e-01 0.830 0.830 NA
hic NA 0.39863 0.14073 0.2114 0.1574 7.66e-01 0.822 0.822 NA
hic1 NA 0.39863 0.14073 0.2114 0.1574 7.66e-01 0.822 NA NA
tmg NA -0.03356 0.08846 -0.1524 0.0927 5.18e-01 0.818 0.818 2
tmg1 NA -0.03356 0.08846 -0.1524 0.0927 5.18e-01 0.818 NA NA
phcg NA -0.03546 0.06909 -0.1216 0.0661 9.36e-01 0.818 0.818 NA
phcg1 NA -0.03546 0.06909 -0.1216 0.0661 9.36e-01 0.818 NA NA
phci NA 0.00898 0.08818 -0.0937 0.0779 7.64e-01 0.814 0.814 1
phci1 NA 0.00898 0.08818 -0.0937 0.0779 7.64e-01 0.814 NA NA
tms NA 0.03959 0.11659 -0.1192 0.1376 9.76e-01 0.808 0.808 NA
rnf NA 0.13956 0.06764 0.2252 0.0975 2.25e-01 0.800 0.800 NA
phcn NA 0.00692 0.07364 -0.0717 0.0881 3.03e-01 0.796 0.796 1
vart NA 0.00640 0.00533 0.0146 0.0117 7.54e-04 0.777 0.777 NA
vbrs NA 0.16142 0.09970 0.0861 0.1319 4.75e-06 0.773 0.773 4
La_mdic - (0.276)vbsg + mdic 0.07786 0.05356 0.0351 0.0380 1.36e-01 0.766 0.785 0
La_vbrt - (0.947)vbst + vbrt -0.02477 0.01482 -0.0345 0.0204 2.83e-01 0.713 0.709 -1
La_as + as - (1.384)mr -0.61158 0.02849 -0.5903 0.0285 8.42e-01 0.709 0.501 -1
La_vbri - (0.209)vbsg + vbri -0.01438 0.03870 -0.0394 0.0364 8.08e-02 0.698 0.803 -1
La_eas + eas - (1.374)mr -0.72443 0.05906 -0.7891 0.1222 4.93e-03 0.697 0.621 -1

1.10 Comparing ILAA vs PCA vs EFA

1.10.1 PCA

featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE)   #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous]) 
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)

#pander::pander(pc$rotation)


PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])


  gplots::heatmap.2(abs(PCACor),
                    trace = "none",
  #                  scale = "row",
                    mar = c(5,5),
                    col=rev(heat.colors(5)),
                    main = "PCA Correlation",
                    cexRow = 0.5,
                    cexCol = 0.5,
                     srtCol=45,
                     srtRow= -45,
                    key.title=NA,
                    key.xlab="Pearson Correlation",
                    xlab="Feature", ylab="Feature")

1.10.2 EFA


EFAdataframe <- dataframeScaled

if (length(iscontinous) < 2000)
{
  topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
  if (topred < 2) topred <- 2
  
  uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE)  # EFA analysis
  predEFA <- predict(uls,dataframeScaled[,iscontinous])
  EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
  colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous]) 


  
  EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
  
  
    gplots::heatmap.2(abs(EFACor),
                      trace = "none",
    #                  scale = "row",
                      mar = c(5,5),
                      col=rev(heat.colors(5)),
                      main = "EFA Correlation",
                      cexRow = 0.5,
                      cexCol = 0.5,
                       srtCol=45,
                       srtRow= -45,
                      key.title=NA,
                      key.xlab="Pearson Correlation",
                      xlab="Feature", ylab="Feature")
}

1.11 Effect on CAR modeling

par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(rawmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
  }


pander::pander(table(dataframe[,outcome],pr))
  0 1
0 83 15
1 11 87
pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.867 0.812 0.911
3 se 0.888 0.808 0.943
4 sp 0.847 0.760 0.912
6 diag.or 43.764 19.005 100.778

par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")

  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(IDeAmodel,main="ILAA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(IDeAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
  }

pander::pander(table(DEdataframe[,outcome],pr))
  0 1
0 89 9
1 10 88
pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.903 0.853 0.941
3 se 0.898 0.820 0.950
4 sp 0.908 0.833 0.957
6 diag.or 87.022 33.739 224.456

par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
  plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
  text(PCAmodel, use.n = TRUE,cex=0.75)
  ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
  0 1
0 85 13
1 20 78
pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.832 0.772 0.881
3 se 0.796 0.703 0.871
4 sp 0.867 0.784 0.927
6 diag.or 25.500 11.891 54.684


par(op)

1.11.1 EFA


  EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
  EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
  pr <- predict(EFAmodel,EFAdataframe,type = "class")
  
  ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
  if (length(unique(pr))>1)
  {
    plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
    text(EFAmodel, use.n = TRUE,cex=0.75)
    ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
  }


  pander::pander(table(EFAdataframe[,outcome],pr))
  0 1
0 86 12
1 23 75
  pander::pander(ptab$detail[c(5,3,4,6),])
  statistic est lower upper
5 diag.ac 0.821 0.761 0.872
3 se 0.765 0.669 0.845
4 sp 0.878 0.796 0.935
6 diag.or 23.370 10.890 50.149
  par(op)